Analysis Scripts
<<<<<<< HEAD
# # Ensure that pacman is installed for package management and loading.
# if (!require("pacman")) install.packages("pacman")
# # for data reading wrangling and visualization
pacman::p_load(tidyverse)
# for working directories
pacman::p_load(here)
# # for cross tabulation and data cleaning
# pacman::p_load(janitor)
# for working with strings
pacman::p_load(glue)
# For randomized inference, also loads randomizr and estimatr
pacman::p_load(ri2)
# for marginal effects from lineal regressions
pacman::p_load(margins)
# Tests for linear regression models
pacman::p_load(lmtest)
pacman::p_load(car)
# Tables
pacman::p_load(kableExtra)
# for updated ggplot2 theme
pacman::p_load(hrbrthemes)
# for updated ggplot2 colorblind-friendly scheme
pacman::p_load(ggthemes)
# theme_set(hrbrthemes::theme_ipsum())
pacman::p_load(reshape2)
# for plotting of covariate balance
pacman::p_load(cobalt)
# for matching only
pacman::p_load(MatchIt)
library(stargazer)
library(usmap)
library(ggplot2)
library(maps)
#install.packages("mapproj")
library(mapproj)
#install.packages("viridis")
library(viridis)
library(dplyr)
library(ggthemes)
library(sjPlot)
library(sjmisc)
library(here)
df <- read.csv(here('', 'rla_clean_5_12.csv'))
The above code will read in our dataset.
df$age <- as.numeric(df$age)
# exclude data with missing covariates
no_na_df <- df %>% drop_na('age', 'state', 'income_clean', 'party', 'gender_female', 'parent_yes', 'race_hispanic', 'edu_4college', 'edu_hs')
love.plot(treatment_group_num ~ age + gender_female + parent_yes + state + income_clean + race_hispanic + edu_4college + edu_hs + dv_pre_state_conf + dv_pre_national_conf + party,
data = no_na_df,
binary = "std",
limits = c(-.5, .5),
thresholds = c(m = .1)) # threshold determines the dotted lines on the graph
m.out <- matchit(dummy_treat ~ age + gender_female + parent_yes + state + income_clean + race_hispanic + edu_4college + edu_hs + dv_pre_state_conf + dv_pre_national_conf + party,
data = no_na_df,
method = "nearest", replace = TRUE) # perform matching
# plot covariate balance with and without matching
love.plot(m.out, binary = "std", thresholds = c(m = .1))
The above code will test our treatment groups to ensure they are balanced. Here, we are testing to ensure there is no statisical bias between groups. In other words, we want to make sure that men and women were approximately equally distributed amongst our treatment groups.
# people within 2 SD of duration
sd2 = sd(df$duration_sec)*2
duration_sd2_df <- no_na_df %>%
filter(abs(duration_sec - mean(duration_sec)) < sd2)
# check to see how many people are removed
rm(duration_sd2_df)
# people who passed attention check
atten_check_df <- no_na_df %>%
filter(attention_pass == 1)
love.plot(treatment_group_num ~ age + gender_female + parent_yes + state + income_clean + race_hispanic + edu_4college + edu_hs + dv_pre_state_conf + dv_pre_national_conf + party,
data = atten_check_df,
binary = "std",
limits = c(-.5, .5),
thresholds = c(m = .1)) # threshold determines the dotted lines on the graph
m.out <- matchit(dummy_treat ~ age + gender_female + parent_yes + state + income_clean + race_hispanic + edu_4college + edu_hs + dv_pre_state_conf + dv_pre_national_conf + party,
data = atten_check_df,
method = "nearest", replace = TRUE) # perform matching
# plot covariate balance with and without matching
love.plot(m.out, binary = "std", thresholds = c(m = .1))
The above code will analyze the impact of keeping those respondents who failed the attention check in our data frame. If the impact is negligible, we plan to keep the attention check failures in the dataset to allow for a larger sample size.
# regression of covariates on treatment assignment variable
balance_lm <- lm(treatment_group_num ~ age + gender_female + parent_yes + state + income_clean + race_hispanic + edu_4college + edu_hs + party + dv_pre_state_conf + dv_pre_national_conf, data = no_na_df) # factor(region) encodes the string variable as a factor for analysis
# Test whether all coefficients from the balancce_lm regression are equal to zero
# using heteroskedasticity-robust standard errors, denoted by hc2
car::linearHypothesis(balance_lm, c("gender_female = 0", "age = 0", "parent_yes = 0", "race_hispanic = 0", "edu_hs = 0", "dv_pre_state_conf = 0", "dv_pre_national_conf = 0"),
test = "F", white.adjust = "hc2", singular.ok = TRUE)
summary(balance_lm)
The above code uses a linear regression to conduct similar balance checks to those above. This is used as a robustness check.
# Define a function to generate a balance check table using two-sample t-test
# Parameters: "cov_list" includes covariates, "treat" indicates the treatment, "alpha" is the significance level
t_table <- function(data, cov_list, treat,
alpha = 0.05) {
# for each covariate in cov_list, apply a function to conduct the t-test
out <- lapply(cov_list, function(x) {
tres <- t.test(data[[x]] ~ data[[treat]]) # t-test result between control and treatment groups' covariate x
# capture the group means for both control and treatment, and the p-value
c(mean_control = as.numeric(tres$estimate[1]), mean_treat = as.numeric(tres$estimate[2]), p_value = tres$p.value)
})
# save results by binding the results for all above covariates in cov_list by row (rbind)
out <- as.data.frame(do.call(rbind, out))
# combine covaraite names and results by column (cbind)
out <- cbind(covariate = cov_list, out)
# get rid of non-important strings in the names
names(out) <- gsub("[^0-9A-Za-z_]", "", names(out)) # gsub(pattern, replacement, x) replaces pattern in x with replacement
# code presentation of p-value according to whether they are below a threshold, for e.g., 0.001
out$p_value <- ifelse(out$p_value < 0.001,
"<0.001",
round(out$p_value, 3) # rounding of p values to 3 decimal places
)
return(out)
}
# Apply above t-t_table function to our dataset and selected covariates
balance_table <- t_table(
data = df,
c("gender_female", "parent_yes", "race_hispanic", "race_white", "race_asian", "race_black", "edu_hs", "edu_4college", "trust_federal", "trust_state"),
"dummy_treat"
)
# output the balance table
balance_table %>%
knitr::kable() %>%
kableExtra::kable_styling(bootstrap_options = "striped")
The above code is also looking at the balance between treatment groups, using a two sample t-test.
# balance table for Democrats
balance_dems <- t_table(
data = subset(df, party=="Democrat"), # subsetting data to those with "party" equal to "Democrat"
c("gender_female", "parent_yes", "race_hispanic", "race_white", "race_asian", "race_black", "edu_hs", "edu_4college", "trust_federal", "trust_state"),
"dummy_treat"
)
# balance table for Republicans
balance_repubs <- t_table(
data = subset(df, party=="Republican"), # subsetting data to those with "party" equal to "Republican"
c("gender_female", "parent_yes", "race_hispanic", "race_white", "race_asian", "race_black", "edu_hs", "edu_4college", "trust_federal", "trust_state"),
"dummy_treat"
)
balance_indpt_other <- t_table(
data = subset(df, party=="Independent" | party == "Other Party"), # subsetting data to those with "party" equal to "Republican"
c("gender_female", "parent_yes", "race_hispanic", "race_white", "race_asian", "race_black", "edu_hs", "edu_4college", "trust_federal", "trust_state"),
"dummy_treat"
)
# combine the 4 t-tables, excluding first column of covariate names for the latter 3, so that only one covariate name column is there
balance_party = cbind(balance_dems, balance_repubs %>% select(2:4), balance_indpt_other %>% select(2:4))
# Format: adding in headers
x <- knitr::kable(balance_party, digits = 2) %>% kable_styling()
# add in a header to label what we're cross-tabulating with
add_header_above(x, c('', 'Democrats'=3, 'Republicans'=3, 'Other and Independent'=3))
Above, we check for balance among political parties.
df[c(6:10,12,19:21,23,27,29,30,32,35:42,44,45)] <- lapply(df[c(6:10,12,19:21,23,27,29,30,32,35:42,44,45)], factor)
atten_check_df$treatment_group <- relevel(factor(atten_check_df$treatment_group), ref = "Control")
# run regressions to estimate treatment effect with robust standard errors
# run the regression for state confidence level
reg1 <- lm(dv_post_state_conf ~ dummy_treat + dv_pre_state_conf, data = df)
# run the regression for national confidence level
reg2 <- lm(dv_post_national_conf ~ dummy_treat + dv_pre_national_conf, data = df)
# run the regression with controls for state level
reg3 <- lm(dv_post_state_conf ~ dummy_treat + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_state, data = df)
# run the regression with controls for national level
reg4 <- lm(dv_post_national_conf ~ dummy_treat + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_federal, data = df)
# compare with those who did not fail attention check
reg1_att <- lm(dv_post_state_conf ~ dummy_treat + dv_pre_state_conf, data = atten_check_df)
reg2_att <- lm(dv_post_national_conf ~ dummy_treat + dv_pre_national_conf, data = atten_check_df)
reg3_att <- lm(dv_post_state_conf ~ dummy_treat + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_state, data = atten_check_df)
reg4_att <- lm(dv_post_national_conf ~ dummy_treat + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_federal, data = atten_check_df)
# do results vary by those who failed the attention check?
# compare by controlling for attention check
reg1_att_con <- lm(dv_post_state_conf ~ dummy_treat + dv_pre_state_conf + attention_pass, data = df)
# do results look the same?
# run the regression for national confidence level
reg2_att_con <- lm(dv_post_national_conf ~ dummy_treat + dv_pre_national_conf + attention_pass, data = df)
# do the results look the same?
# run the regression with controls for state level
reg3_att_con <- lm(dv_post_state_conf ~ dummy_treat + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + attention_pass + trust_state, data = df)
# do results look the same?
# run the regression with controls for national level
reg4_att_con <- lm(dv_post_national_conf ~ dummy_treat + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + attention_pass + trust_federal, data = df)
# do the results look the same?
The code above contains important elements of our analysis. First, it will run the state and national level regressions to determine the effect of being treated versus controlled. The initial regressions do not control for any covariates. Then it will run a similar regression, this time with controls in place. These initial regressions do not include respondents that fail the attention check. Then, it runs these same regressions including the group that failed the attention check and will assess whether or not there is a statistically significant difference. This chunk of code will test our first hypothesis, determining whether being treated (despite treatment message) is different than not being treated.
# run the regression with treatment groups
df$treatment_group <- relevel(factor(df$treatment_group), ref = "Control")
#state level with treatment groups
reg5 <- lm(dv_post_state_conf ~ treatment_group + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_state, data = df)
# national with treatment groups
reg6 <- lm(dv_post_national_conf ~ treatment_group + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_federal, data = df)
# compared with those who did not fail the attention check
reg5_att <- lm(dv_post_state_conf ~ treatment_group + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_state, data = atten_check_df)
# are results different?
reg6_att <- lm(dv_post_national_conf ~ treatment_group + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_federal, data = df)
# are results different?
# compare controlling for the attention check
#state level with treatment groups
reg5_att_con <- lm(dv_post_state_conf ~ treatment_group + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + attention_pass + trust_state, data = df)
# are results different?
# national with treatment groups
reg6_att_con <- lm(dv_post_national_conf ~ treatment_group + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + attention_pass + trust_federal, data = df)
# are results different?
# we can get rid of the extra regressions now
rm(reg1_att, reg1_att_con,reg2_att, reg2_att_con,reg3_att, reg3_att_con, reg4_att, reg4_att_con,reg5_att, reg5_att_con,reg6_att, reg6_att_con)
rm(atten_check_df)
Like the previous block of code, this code contains analysis important to our experiment. Namely, it tests our second hypothesis for differences in treatment effects by message. The regressions are run at the state and national level with controls. Again, we plan to compare the respondents with and without attention check failures.
# run a linear regression to measure the effect of treatments on the secondary effect question
reg_state_seceffect <- lm(seceffect ~ treatment_group + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_state, data = df)
The above code is the linear regression for our secondary dependent variable, which will be used as a robustness check.
labs <- c("Bipartisan", "Hand Count", "Local", "Loser", "RLA Percentage", "Soup")
# for the state level with controls
lm_state <- bind_rows(list(tidy(reg5))) %>%
filter((term %in% c("treatment_groupControl", "treatment_groupBipartisan", "treatment_groupHandcount", "treatment_groupLocal", "treatment_groupLoser", "treatment_groupRL_percentage", "treatment_groupSoup")))
p4 <- ggplot(lm_state, aes(x = term,y =estimate)) +
geom_point(position=position_dodge(width=0.5)) +
geom_errorbar(aes(x=term,ymin = estimate - 1.96 * std.error,ymax = estimate + 1.96 * std.error),width = .1,position=position_dodge(width=0.5), color = "steelblue2") +
xlab('Treatment') +
ylab('Estimate') +
theme(plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 45, vjust = 1, hjust =1)) +
geom_hline(yintercept = 0, colour = 'grey60', linetype = 2) +
coord_cartesian(ylim=c(-.75, .75)) +
ggtitle('Post-Test State Outcome w/ controls (conditioned on pre-test)') +
scale_x_discrete(labels = labs)
# for the national level
lm_national <- bind_rows(list(tidy(reg6))) %>%
filter((term %in% c("treatment_groupControl", "treatment_groupBipartisan", "treatment_groupHandcount", "treatment_groupLocal", "treatment_groupLoser", "treatment_groupRL_percentage", "treatment_groupSoup")))
# plot treatment effect
p5 <- ggplot(lm_national, aes(x = term,y =estimate)) +
geom_point(position=position_dodge(width=0.5)) +
geom_errorbar(aes(x=term,ymin = estimate - 1.96 * std.error,ymax = estimate + 1.96 * std.error),width = .1,position=position_dodge(width=0.5), color = "firebrick") +
xlab('Treatment') +
ylab('Estimate') +
theme(plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 45, vjust = 1, hjust =1)) +
geom_hline(yintercept = 0, colour = 'grey60', linetype = 2) +
coord_cartesian(ylim=c(-.75, .75)) +
ggtitle('Post-Test National Outcome w/ controls (conditioned on pre-test)') +
scale_x_discrete(labels = labs)
# for the state level secondary effect with controls
lm_state_sec <- bind_rows(list(tidy(reg_state_seceffect))) %>%
filter((term %in% c("treatment_groupControl", "treatment_groupBipartisan", "treatment_groupHandcount", "treatment_groupLocal", "treatment_groupLoser", "treatment_groupRL_percentage", "treatment_groupSoup")))
sec_effect <- ggplot(lm_state_sec, aes(x = term,y =estimate)) +
geom_point(position=position_dodge(width=0.5)) +
geom_errorbar(aes(x=term,ymin = estimate - 1.96 * std.error,ymax = estimate + 1.96 * std.error),width = .1,position=position_dodge(width=0.5), color = "steelblue2") +
xlab('Treatment') +
ylab('Estimate') +
theme(plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 45, vjust = 1, hjust =1)) +
geom_hline(yintercept = 0, colour = 'grey60', linetype = 2) +
coord_cartesian(ylim=c(-.5, 1)) +
ggtitle('Secondary Effect State Outcome w/ controls (conditioned on pre-test)') +
scale_x_discrete(labels = labs)
p4
p5
sec_effect
The code above will plot the treatment effects.
# simple difference in means for those treated with a message
with(df, mean(dv_post_state_conf[dummy_treat == 1]) - mean(dv_post_state_conf[dummy_treat == 0])) #with(data, expr, …) evaluates the expr on the data
with(df, mean(dv_post_national_conf[dummy_treat == 1]) - mean(dv_post_national_conf[dummy_treat == 0])) #with(data, expr, …) evaluates the expr on the data
The code above will run a simple t-test to determine whether or not being treated has an effect.
# Use the estimatr package for difference in means estimates
estimatr::difference_in_means(dv_state_treatment_diff ~ dummy_treat, data = df)
# difference_in_means(y ~ x, data) computes mean(y when x==1) - mean(y when x==0) in data, along with standard errors and p-values from two-sided t-tests
estimatr::difference_in_means(dv_national_treatment_diff ~ dummy_treat, data = df)
This code will also run a t-test to determine whether or not being treated has an effect.
# difference in means for women at state level
df$gender_female <- to_factor(df$gender_female)
estimatr::difference_in_means(dv_state_treatment_diff ~ dummy_treat, data = df, subset = gender_female == "1")
# difference in means for women at national level
estimatr::difference_in_means(dv_national_treatment_diff ~ dummy_treat, data = df, subset = gender_female == "1")
# difference in means for age at state level
df$age <- to_numeric(df$age)
# age < 40
estimatr::difference_in_means(dv_state_treatment_diff ~ dummy_treat, data = df, subset = age < 40)
# age > 40
estimatr::difference_in_means(dv_state_treatment_diff ~ dummy_treat, data = df, subset = age > 40)
# difference in means for age at national level
# age > 40
estimatr::difference_in_means(dv_national_treatment_diff ~ dummy_treat, data = df, subset = age > 40)
# difference in means for HS education at state level
estimatr::difference_in_means(dv_state_treatment_diff ~ dummy_treat, data = df, subset = edu_hs == "1")
# difference in means for HS education at national level
estimatr::difference_in_means(dv_national_treatment_diff ~ dummy_treat, data = df, subset = edu_hs== "1")
# difference in means for college education at state level
estimatr::difference_in_means(dv_state_treatment_diff ~ dummy_treat, data = df, subset = edu_4college == "1")
# difference in means for college education at national level
estimatr::difference_in_means(dv_national_treatment_diff ~ dummy_treat, data = df, subset = edu_4college == "1")
The code above will run a t-test for many of the heterogeneous treatment effects for which we have interest.
# Gender interaction
#state level interaction for gender
reg_state_fem <- lm(dv_post_state_conf ~ treatment_group*gender_female + dv_pre_state_conf + age + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_state, data = df)
# national level interaction for gender
reg_natl_fem <- lm(dv_post_national_conf ~ treatment_group*gender_female + dv_pre_national_conf + age + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_federal, data = df)
The code above will create a linear regression with interaction between gender and treatement effect, which will be used to determine heterogeneous treatment effects based on gender.
# Age interaction
#state level
reg_state_age <- lm(dv_post_state_conf ~ treatment_group*age + gender_female + dv_pre_state_conf + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_state, data = df)
# national level
reg_natl_age <- lm(dv_post_national_conf ~ treatment_group*age + gender_female + dv_pre_national_conf + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_federal, data = df)
The code above will create a linear regression with interaction between age and treatement effect, which will be used to determine heterogeneous treatment effects based on age.
# Education interaction
# state only HS
reg_state_edu_hs <- lm(dv_post_state_conf ~ treatment_group*edu_hs + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + party + trust_state, data = df)
# national only HS
reg_natl_edu_hs <- lm(dv_post_national_conf ~ treatment_group*edu_hs + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + party + trust_federal, data = df)
# state college or above
reg_state_edu <- lm(dv_post_state_conf ~ treatment_group*edu_4college + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_hs + party + trust_state, data = df)
# national college or above
reg_natl_edu <- lm(dv_post_national_conf ~ treatment_group*edu_4college + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_hs + party + trust_federal, data = df)
The code above will create a linear regression with interaction between education and treatement effect, which will be used to determine heterogeneous treatment effects based on education.
# income interaction
# state
reg_state_inc <- lm(dv_post_state_conf ~ treatment_group*income_num + dv_pre_state_conf + age + gender_female + parent_yes + state + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_state, data = df)
# national
reg_natl_inc <- lm(dv_post_national_conf ~ treatment_group*income_num + dv_pre_national_conf + age + gender_female + parent_yes + state + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_federal, data = df)
The code above will create a linear regression with interaction between income and treatement effect, which will be used to determine heterogeneous treatment effects based on income.
# always vs never believer interaction
df$always_believer <- to_factor(df$always_believer)
df$never_believer <- to_factor(df$never_believer)
# state never believer
reg_state_never <- lm(dv_post_state_conf ~ treatment_group*never_believer + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_state, data = df)
# national never believer
reg_natl_never <- lm(dv_post_national_conf ~ treatment_group*never_believer + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_federal, data = df)
# state always believer
reg_state_always <- lm(dv_post_state_conf ~ treatment_group*always_believer + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + trust_state, data = df)
# national always believer
reg_natl_always <- lm(dv_post_national_conf ~ treatment_group*always_believer + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_federal, data = df)
The code above will create a linear regression with interaction between ‘always believers & never believers’ and treatement effect, which will be used to determine heterogeneous treatment effects based on belief in the government.
# political party interaction
# state level
df$party <- to_factor(df$party)
reg_state_party <- lm(dv_post_state_conf ~ treatment_group*party + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + trust_state, data = df)
# national level
reg_natl_party <- lm(dv_post_national_conf ~ treatment_group*party + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + trust_federal, data = df)
The code above will create a linear regression with interaction between political party and treatement effect, which will be used to determine heterogeneous treatment effects based on political party.
# race interaction
# state level race_hispanic Y/N
reg_state_hisp <- lm(dv_post_state_conf ~ treatment_group*race_hispanic + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_state, data = df)
# national level race_hispanic Y/N
reg_natl_hisp <- lm(dv_post_national_conf ~ treatment_group*race_hispanic + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_federal, data = df)
# state level race_white
reg_state_white <- lm(dv_post_state_conf ~ treatment_group*race_white + race_hispanic + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_black + race_asian + edu_4college + edu_hs + party + trust_state, data = df)
# national level race_white
reg_natl_white <- lm(dv_post_national_conf ~ treatment_group*race_white + race_hispanic + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_black + race_asian + edu_4college + edu_hs + party + trust_federal, data = df)
# state level race_black
reg_state_black <- lm(dv_post_state_conf ~ treatment_group*race_black + race_hispanic + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_white + race_asian + edu_4college + edu_hs + party + trust_state, data = df)
# national level race_black
reg_natl_black <- lm(dv_post_national_conf ~ treatment_group*race_black + race_hispanic + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_white + race_asian + edu_4college + edu_hs + party + trust_federal, data = df)
The code above will create a linear regression with interaction between race and treatement effect, which will be used to determine heterogeneous treatment effects based on race.
# state level middle views ( > 1 and < 10)
reg_middle_views_state <- lm(dv_post_state_conf ~ treatment_group + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party, data = df, subset = (dv_pre_state_conf < 10 & dv_pre_state_conf > 1))
# national middle views ( > 1 and < 10)
reg_middle_views_natl <- lm(dv_post_national_conf ~ treatment_group + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party, data = df, subset = (dv_pre_state_conf < 10 & dv_pre_state_conf > 1))
=======
# # Ensure that pacman is installed for package management and loading.
# if (!require("pacman")) install.packages("pacman")
# # for data reading wrangling and visualization
pacman::p_load(tidyverse)
# for working directories
pacman::p_load(here)
# # for cross tabulation and data cleaning
# pacman::p_load(janitor)
# for working with strings
pacman::p_load(glue)
# For randomized inference, also loads randomizr and estimatr
pacman::p_load(ri2)
# for marginal effects from lineal regressions
pacman::p_load(margins)
# Tests for linear regression models
pacman::p_load(lmtest)
pacman::p_load(car)
# Tables
pacman::p_load(kableExtra)
# for updated ggplot2 theme
pacman::p_load(hrbrthemes)
# for updated ggplot2 colorblind-friendly scheme
pacman::p_load(ggthemes)
# theme_set(hrbrthemes::theme_ipsum())
pacman::p_load(reshape2)
# for plotting of covariate balance
pacman::p_load(cobalt)
# for matching only
pacman::p_load(MatchIt)
library(stargazer)
library(usmap)
library(ggplot2)
library(maps)
#install.packages("mapproj")
library(mapproj)
#install.packages("viridis")
library(viridis)
library(dplyr)
library(ggthemes)
library(sjPlot)
library(sjmisc)
library(here)
df <- read.csv(here('', 'rla_clean_5_12.csv'))
The above code will read in our dataset.
df$age <- as.numeric(df$age)
# exclude data with missing covariates
no_na_df <- df %>% drop_na('age', 'state', 'income_clean', 'party', 'gender_female', 'parent_yes', 'race_hispanic', 'edu_4college', 'edu_hs')
love.plot(treatment_group_num ~ age + gender_female + parent_yes + state + income_clean + race_hispanic + edu_4college + edu_hs + dv_pre_state_conf + dv_pre_national_conf + party,
data = no_na_df,
binary = "std",
limits = c(-.5, .5),
thresholds = c(m = .1)) # threshold determines the dotted lines on the graph
m.out <- matchit(dummy_treat ~ age + gender_female + parent_yes + state + income_clean + race_hispanic + edu_4college + edu_hs + dv_pre_state_conf + dv_pre_national_conf + party,
data = no_na_df,
method = "nearest", replace = TRUE) # perform matching
# plot covariate balance with and without matching
love.plot(m.out, binary = "std", thresholds = c(m = .1))
The above code will test our treatment groups to ensure they are balanced. Here, we are testing to ensure there is no statisical bias between groups. In other words, we want to make sure that men and women were approximately equally distributed amongst our treatment groups.
# people within 2 SD of duration
sd2 = sd(df$duration_sec)*2
duration_sd2_df <- no_na_df %>%
filter(abs(duration_sec - mean(duration_sec)) < sd2)
# check to see how many people are removed
rm(duration_sd2_df)
# people who passed attention check
atten_check_df <- no_na_df %>%
filter(attention_pass == 1)
love.plot(treatment_group_num ~ age + gender_female + parent_yes + state + income_clean + race_hispanic + edu_4college + edu_hs + dv_pre_state_conf + dv_pre_national_conf + party,
data = atten_check_df,
binary = "std",
limits = c(-.5, .5),
thresholds = c(m = .1)) # threshold determines the dotted lines on the graph
m.out <- matchit(dummy_treat ~ age + gender_female + parent_yes + state + income_clean + race_hispanic + edu_4college + edu_hs + dv_pre_state_conf + dv_pre_national_conf + party,
data = atten_check_df,
method = "nearest", replace = TRUE) # perform matching
# plot covariate balance with and without matching
love.plot(m.out, binary = "std", thresholds = c(m = .1))
The above code will analyze the impact of keeping those respondents who failed the attention check in our data frame. If the impact is negligible, we plan to keep the attention check failures in the dataset to allow for a larger sample size.
# regression of covariates on treatment assignment variable
balance_lm <- lm(treatment_group_num ~ age + gender_female + parent_yes + state + income_clean + race_hispanic + edu_4college + edu_hs + party + dv_pre_state_conf + dv_pre_national_conf, data = no_na_df) # factor(region) encodes the string variable as a factor for analysis
# Test whether all coefficients from the balancce_lm regression are equal to zero
# using heteroskedasticity-robust standard errors, denoted by hc2
car::linearHypothesis(balance_lm, c("gender_female = 0", "age = 0", "parent_yes = 0", "race_hispanic = 0", "edu_hs = 0", "dv_pre_state_conf = 0", "dv_pre_national_conf = 0"),
test = "F", white.adjust = "hc2", singular.ok = TRUE)
summary(balance_lm)
The above code uses a linear regression to conduct similar balance checks to those above. This is used as a robustness check.
# Define a function to generate a balance check table using two-sample t-test
# Parameters: "cov_list" includes covariates, "treat" indicates the treatment, "alpha" is the significance level
t_table <- function(data, cov_list, treat,
alpha = 0.05) {
# for each covariate in cov_list, apply a function to conduct the t-test
out <- lapply(cov_list, function(x) {
tres <- t.test(data[[x]] ~ data[[treat]]) # t-test result between control and treatment groups' covariate x
# capture the group means for both control and treatment, and the p-value
c(mean_control = as.numeric(tres$estimate[1]), mean_treat = as.numeric(tres$estimate[2]), p_value = tres$p.value)
})
# save results by binding the results for all above covariates in cov_list by row (rbind)
out <- as.data.frame(do.call(rbind, out))
# combine covaraite names and results by column (cbind)
out <- cbind(covariate = cov_list, out)
# get rid of non-important strings in the names
names(out) <- gsub("[^0-9A-Za-z_]", "", names(out)) # gsub(pattern, replacement, x) replaces pattern in x with replacement
# code presentation of p-value according to whether they are below a threshold, for e.g., 0.001
out$p_value <- ifelse(out$p_value < 0.001,
"<0.001",
round(out$p_value, 3) # rounding of p values to 3 decimal places
)
return(out)
}
# Apply above t-t_table function to our dataset and selected covariates
balance_table <- t_table(
data = df,
c("gender_female", "parent_yes", "race_hispanic", "race_white", "race_asian", "race_black", "edu_hs", "edu_4college", "trust_federal", "trust_state"),
"dummy_treat"
)
# output the balance table
balance_table %>%
knitr::kable() %>%
kableExtra::kable_styling(bootstrap_options = "striped")
The above code is also looking at the balance between treatment groups, using a two sample t-test.
# balance table for Democrats
balance_dems <- t_table(
data = subset(df, party=="Democrat"), # subsetting data to those with "party" equal to "Democrat"
c("gender_female", "parent_yes", "race_hispanic", "race_white", "race_asian", "race_black", "edu_hs", "edu_4college", "trust_federal", "trust_state"),
"dummy_treat"
)
# balance table for Republicans
balance_repubs <- t_table(
data = subset(df, party=="Republican"), # subsetting data to those with "party" equal to "Republican"
c("gender_female", "parent_yes", "race_hispanic", "race_white", "race_asian", "race_black", "edu_hs", "edu_4college", "trust_federal", "trust_state"),
"dummy_treat"
)
balance_indpt_other <- t_table(
data = subset(df, party=="Independent" | party == "Other Party"), # subsetting data to those with "party" equal to "Republican"
c("gender_female", "parent_yes", "race_hispanic", "race_white", "race_asian", "race_black", "edu_hs", "edu_4college", "trust_federal", "trust_state"),
"dummy_treat"
)
# combine the 4 t-tables, excluding first column of covariate names for the latter 3, so that only one covariate name column is there
balance_party = cbind(balance_dems, balance_repubs %>% select(2:4), balance_indpt_other %>% select(2:4))
# Format: adding in headers
x <- knitr::kable(balance_party, digits = 2) %>% kable_styling()
# add in a header to label what we're cross-tabulating with
add_header_above(x, c('', 'Democrats'=3, 'Republicans'=3, 'Other and Independent'=3))
Above, we check for balance among political parties.
df[c(6:10,12,19:21,23,27,29,30,32,35:42,44,45)] <- lapply(df[c(6:10,12,19:21,23,27,29,30,32,35:42,44,45)], factor)
atten_check_df$treatment_group <- relevel(factor(atten_check_df$treatment_group), ref = "Control")
# run regressions to estimate treatment effect with robust standard errors
# run the regression for state confidence level
reg1 <- lm(dv_post_state_conf ~ dummy_treat + dv_pre_state_conf, data = df)
# run the regression for national confidence level
reg2 <- lm(dv_post_national_conf ~ dummy_treat + dv_pre_national_conf, data = df)
# run the regression with controls for state level
reg3 <- lm(dv_post_state_conf ~ dummy_treat + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_state, data = df)
# run the regression with controls for national level
reg4 <- lm(dv_post_national_conf ~ dummy_treat + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_federal, data = df)
# compare with those who did not fail attention check
reg1_att <- lm(dv_post_state_conf ~ dummy_treat + dv_pre_state_conf, data = atten_check_df)
reg2_att <- lm(dv_post_national_conf ~ dummy_treat + dv_pre_national_conf, data = atten_check_df)
reg3_att <- lm(dv_post_state_conf ~ dummy_treat + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_state, data = atten_check_df)
reg4_att <- lm(dv_post_national_conf ~ dummy_treat + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_federal, data = atten_check_df)
# do results vary by those who failed the attention check?
# compare by controlling for attention check
reg1_att_con <- lm(dv_post_state_conf ~ dummy_treat + dv_pre_state_conf + attention_pass, data = df)
# do results look the same?
# run the regression for national confidence level
reg2_att_con <- lm(dv_post_national_conf ~ dummy_treat + dv_pre_national_conf + attention_pass, data = df)
# do the results look the same?
# run the regression with controls for state level
reg3_att_con <- lm(dv_post_state_conf ~ dummy_treat + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + attention_pass + trust_state, data = df)
# do results look the same?
# run the regression with controls for national level
reg4_att_con <- lm(dv_post_national_conf ~ dummy_treat + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + attention_pass + trust_federal, data = df)
# do the results look the same?
The code above contains important elements of our analysis. First, it will run the state and national level regressions to determine the effect of being treated versus controlled. The initial regressions do not control for any covariates. Then it will run a similar regression, this time with controls in place. These initial regressions do not include respondents that fail the attention check. Then, it runs these same regressions including the group that failed the attention check and will assess whether or not there is a statistically significant difference. This chunk of code will test our first hypothesis, determining whether being treated (despite treatment message) is different than not being treated.
# run the regression with treatment groups
df$treatment_group <- relevel(factor(df$treatment_group), ref = "Control")
#state level with treatment groups
reg5 <- lm(dv_post_state_conf ~ treatment_group + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_state, data = df)
# national with treatment groups
reg6 <- lm(dv_post_national_conf ~ treatment_group + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_federal, data = df)
# compared with those who did not fail the attention check
reg5_att <- lm(dv_post_state_conf ~ treatment_group + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_state, data = atten_check_df)
# are results different?
reg6_att <- lm(dv_post_national_conf ~ treatment_group + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_federal, data = df)
# are results different?
# compare controlling for the attention check
#state level with treatment groups
reg5_att_con <- lm(dv_post_state_conf ~ treatment_group + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + attention_pass + trust_state, data = df)
# are results different?
# national with treatment groups
reg6_att_con <- lm(dv_post_national_conf ~ treatment_group + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + attention_pass + trust_federal, data = df)
# are results different?
# we can get rid of the extra regressions now
rm(reg1_att, reg1_att_con,reg2_att, reg2_att_con,reg3_att, reg3_att_con, reg4_att, reg4_att_con,reg5_att, reg5_att_con,reg6_att, reg6_att_con)
rm(atten_check_df)
Like the previous block of code, this code contains analysis important to our experiment. Namely, it tests our second hypothesis for differences in treatment effects by message. The regressions are run at the state and national level with controls. Again, we plan to compare the respondents with and without attention check failures.
# run a linear regression to measure the effect of treatments on the secondary effect question
reg_state_seceffect <- lm(seceffect ~ treatment_group + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_state, data = df)
The above code is the linear regression for our secondary dependent variable, which will be used as a robustness check.
labs <- c("Bipartisan", "Hand Count", "Local", "Loser", "RLA Percentage", "Soup")
# for the state level with controls
lm_state <- bind_rows(list(tidy(reg5))) %>%
filter((term %in% c("treatment_groupControl", "treatment_groupBipartisan", "treatment_groupHandcount", "treatment_groupLocal", "treatment_groupLoser", "treatment_groupRL_percentage", "treatment_groupSoup")))
p4 <- ggplot(lm_state, aes(x = term,y =estimate)) +
geom_point(position=position_dodge(width=0.5)) +
geom_errorbar(aes(x=term,ymin = estimate - 1.96 * std.error,ymax = estimate + 1.96 * std.error),width = .1,position=position_dodge(width=0.5), color = "steelblue2") +
xlab('Treatment') +
ylab('Estimate') +
theme(plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 45, vjust = 1, hjust =1)) +
geom_hline(yintercept = 0, colour = 'grey60', linetype = 2) +
coord_cartesian(ylim=c(-.75, .75)) +
ggtitle('Post-Test State Outcome w/ controls (conditioned on pre-test)') +
scale_x_discrete(labels = labs)
# for the national level
lm_national <- bind_rows(list(tidy(reg6))) %>%
filter((term %in% c("treatment_groupControl", "treatment_groupBipartisan", "treatment_groupHandcount", "treatment_groupLocal", "treatment_groupLoser", "treatment_groupRL_percentage", "treatment_groupSoup")))
# plot treatment effect
p5 <- ggplot(lm_national, aes(x = term,y =estimate)) +
geom_point(position=position_dodge(width=0.5)) +
geom_errorbar(aes(x=term,ymin = estimate - 1.96 * std.error,ymax = estimate + 1.96 * std.error),width = .1,position=position_dodge(width=0.5), color = "firebrick") +
xlab('Treatment') +
ylab('Estimate') +
theme(plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 45, vjust = 1, hjust =1)) +
geom_hline(yintercept = 0, colour = 'grey60', linetype = 2) +
coord_cartesian(ylim=c(-.75, .75)) +
ggtitle('Post-Test National Outcome w/ controls (conditioned on pre-test)') +
scale_x_discrete(labels = labs)
# for the state level secondary effect with controls
lm_state_sec <- bind_rows(list(tidy(reg_state_seceffect))) %>%
filter((term %in% c("treatment_groupControl", "treatment_groupBipartisan", "treatment_groupHandcount", "treatment_groupLocal", "treatment_groupLoser", "treatment_groupRL_percentage", "treatment_groupSoup")))
sec_effect <- ggplot(lm_state_sec, aes(x = term,y =estimate)) +
geom_point(position=position_dodge(width=0.5)) +
geom_errorbar(aes(x=term,ymin = estimate - 1.96 * std.error,ymax = estimate + 1.96 * std.error),width = .1,position=position_dodge(width=0.5), color = "steelblue2") +
xlab('Treatment') +
ylab('Estimate') +
theme(plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 45, vjust = 1, hjust =1)) +
geom_hline(yintercept = 0, colour = 'grey60', linetype = 2) +
coord_cartesian(ylim=c(-.5, 1)) +
ggtitle('Secondary Effect State Outcome w/ controls (conditioned on pre-test)') +
scale_x_discrete(labels = labs)
p4
p5
sec_effect
The code above will plot the treatment effects.
# simple difference in means for those treated with a message
with(df, mean(dv_post_state_conf[dummy_treat == 1]) - mean(dv_post_state_conf[dummy_treat == 0])) #with(data, expr, …) evaluates the expr on the data
with(df, mean(dv_post_national_conf[dummy_treat == 1]) - mean(dv_post_national_conf[dummy_treat == 0])) #with(data, expr, …) evaluates the expr on the data
The code above will run a simple t-test to determine whether or not being treated has an effect.
# Use the estimatr package for difference in means estimates
estimatr::difference_in_means(dv_state_treatment_diff ~ dummy_treat, data = df)
# difference_in_means(y ~ x, data) computes mean(y when x==1) - mean(y when x==0) in data, along with standard errors and p-values from two-sided t-tests
estimatr::difference_in_means(dv_national_treatment_diff ~ dummy_treat, data = df)
This code will also run a t-test to determine whether or not being treated has an effect.
# difference in means for women at state level
df$gender_female <- to_factor(df$gender_female)
estimatr::difference_in_means(dv_state_treatment_diff ~ dummy_treat, data = df, subset = gender_female == "1")
# difference in means for women at national level
estimatr::difference_in_means(dv_national_treatment_diff ~ dummy_treat, data = df, subset = gender_female == "1")
# difference in means for age at state level
df$age <- to_numeric(df$age)
# age < 40
estimatr::difference_in_means(dv_state_treatment_diff ~ dummy_treat, data = df, subset = age < 40)
# age > 40
estimatr::difference_in_means(dv_state_treatment_diff ~ dummy_treat, data = df, subset = age > 40)
# difference in means for age at national level
# age > 40
estimatr::difference_in_means(dv_national_treatment_diff ~ dummy_treat, data = df, subset = age > 40)
# difference in means for HS education at state level
estimatr::difference_in_means(dv_state_treatment_diff ~ dummy_treat, data = df, subset = edu_hs == "1")
# difference in means for HS education at national level
estimatr::difference_in_means(dv_national_treatment_diff ~ dummy_treat, data = df, subset = edu_hs== "1")
# difference in means for college education at state level
estimatr::difference_in_means(dv_state_treatment_diff ~ dummy_treat, data = df, subset = edu_4college == "1")
# difference in means for college education at national level
estimatr::difference_in_means(dv_national_treatment_diff ~ dummy_treat, data = df, subset = edu_4college == "1")
The code above will run a t-test for many of the heterogeneous treatment effects for which we have interest.
# Gender interaction
#state level interaction for gender
reg_state_fem <- lm(dv_post_state_conf ~ treatment_group*gender_female + dv_pre_state_conf + age + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_state, data = df)
# national level interaction for gender
reg_natl_fem <- lm(dv_post_national_conf ~ treatment_group*gender_female + dv_pre_national_conf + age + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_federal, data = df)
The code above will create a linear regression with interaction between gender and treatement effect, which will be used to determine heterogeneous treatment effects based on gender.
# Age interaction
#state level
reg_state_age <- lm(dv_post_state_conf ~ treatment_group*age + gender_female + dv_pre_state_conf + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_state, data = df)
# national level
reg_natl_age <- lm(dv_post_national_conf ~ treatment_group*age + gender_female + dv_pre_national_conf + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_federal, data = df)
The code above will create a linear regression with interaction between age and treatement effect, which will be used to determine heterogeneous treatment effects based on age.
# Education interaction
# state only HS
reg_state_edu_hs <- lm(dv_post_state_conf ~ treatment_group*edu_hs + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + party + trust_state, data = df)
# national only HS
reg_natl_edu_hs <- lm(dv_post_national_conf ~ treatment_group*edu_hs + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + party + trust_federal, data = df)
# state college or above
reg_state_edu <- lm(dv_post_state_conf ~ treatment_group*edu_4college + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_hs + party + trust_state, data = df)
# national college or above
reg_natl_edu <- lm(dv_post_national_conf ~ treatment_group*edu_4college + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_hs + party + trust_federal, data = df)
The code above will create a linear regression with interaction between education and treatement effect, which will be used to determine heterogeneous treatment effects based on education.
# income interaction
# state
reg_state_inc <- lm(dv_post_state_conf ~ treatment_group*income_num + dv_pre_state_conf + age + gender_female + parent_yes + state + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_state, data = df)
# national
reg_natl_inc <- lm(dv_post_national_conf ~ treatment_group*income_num + dv_pre_national_conf + age + gender_female + parent_yes + state + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_federal, data = df)
The code above will create a linear regression with interaction between income and treatement effect, which will be used to determine heterogeneous treatment effects based on income.
# always vs never believer interaction
df$always_believer <- to_factor(df$always_believer)
df$never_believer <- to_factor(df$never_believer)
# state never believer
reg_state_never <- lm(dv_post_state_conf ~ treatment_group*never_believer + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_state, data = df)
# national never believer
reg_natl_never <- lm(dv_post_national_conf ~ treatment_group*never_believer + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_federal, data = df)
# state always believer
reg_state_always <- lm(dv_post_state_conf ~ treatment_group*always_believer + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + trust_state, data = df)
# national always believer
reg_natl_always <- lm(dv_post_national_conf ~ treatment_group*always_believer + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_federal, data = df)
The code above will create a linear regression with interaction between ‘always believers & never believers’ and treatement effect, which will be used to determine heterogeneous treatment effects based on belief in the government.
# political party interaction
# state level
df$party <- to_factor(df$party)
reg_state_party <- lm(dv_post_state_conf ~ treatment_group*party + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + trust_state, data = df)
# national level
reg_natl_party <- lm(dv_post_national_conf ~ treatment_group*party + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + trust_federal, data = df)
The code above will create a linear regression with interaction between political party and treatement effect, which will be used to determine heterogeneous treatment effects based on political party.
# race interaction
# state level race_hispanic Y/N
reg_state_hisp <- lm(dv_post_state_conf ~ treatment_group*race_hispanic + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_state, data = df)
# national level race_hispanic Y/N
reg_natl_hisp <- lm(dv_post_national_conf ~ treatment_group*race_hispanic + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_white + race_black + race_asian + edu_4college + edu_hs + party + trust_federal, data = df)
# state level race_white
reg_state_white <- lm(dv_post_state_conf ~ treatment_group*race_white + race_hispanic + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_black + race_asian + edu_4college + edu_hs + party + trust_state, data = df)
# national level race_white
reg_natl_white <- lm(dv_post_national_conf ~ treatment_group*race_white + race_hispanic + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_black + race_asian + edu_4college + edu_hs + party + trust_federal, data = df)
# state level race_black
reg_state_black <- lm(dv_post_state_conf ~ treatment_group*race_black + race_hispanic + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_white + race_asian + edu_4college + edu_hs + party + trust_state, data = df)
# national level race_black
reg_natl_black <- lm(dv_post_national_conf ~ treatment_group*race_black + race_hispanic + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_white + race_asian + edu_4college + edu_hs + party + trust_federal, data = df)
The code above will create a linear regression with interaction between race and treatement effect, which will be used to determine heterogeneous treatment effects based on race.
# state level middle views ( > 1 and < 10)
reg_middle_views_state <- lm(dv_post_state_conf ~ treatment_group + dv_pre_state_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party, data = df, subset = (dv_pre_state_conf < 10 & dv_pre_state_conf > 1))
# national middle views ( > 1 and < 10)
reg_middle_views_natl <- lm(dv_post_national_conf ~ treatment_group + dv_pre_national_conf + age + gender_female + parent_yes + state + income_clean + race_hispanic + race_white + race_black + race_asian + edu_4college + edu_hs + party, data = df, subset = (dv_pre_state_conf < 10 & dv_pre_state_conf > 1))
>>>>>>> d5cde666c4204b15698eef9b43b1f1e4e454b840
The code above will create a linear regression without including those respondents in the extremes (respondents who answered either a 1 or 10). We can use this regression to analyze whether or not those respondents not in the extremes have different treatment effects than those in the extremes.